# load required libraries
library("lme4")
library("ggplot2")
library("here")
library("tidyverse")
library("emmeans")
library("brms")
library("DHARMa")
library("glmmTMB")
library("performance")
library("scales")
library("grateful")

1 Exploration and visualisation

A pseudonymized/codified dataset was used as a precaution due to ethical considerations (patient-level clinical data). The full dataset is available upon request (see manuscript data availability statement). For the in vitro assays specifically, age and demographic location variables have been removed, in addition to codifying the levels of the variables not of direct interest.

# load the data
df <- read_csv(here("./data/ivSCAs_codified.csv"), show_col_types = FALSE) %>%
  rename(HBB.genotype = `HBB Genotype`) %>% 
  rename(Sample.type = `Sample type`) %>% 
  select(ID, Group, Sample.type, HBB.genotype, Bloodgroup, Reticulocytes, Gender, Symptomatic,
         # Age, # omitted due to data sensitivity
         # Village, Ethnicity, # omitted due to data sensitivity + missing values
         `Raw counts_iRBCs_CHOLINE_Gen1_35to40hpi`,
         `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`,
         `Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
         `Raw counts_Retics_Reticulocytes`, 
         `Raw counts_Retics_total RBCs`,
         SCR_CHOLINE_Gen1_35to40hpi, # keep SCR for plots
         # no choline models
         `Raw counts_iRBCs_NO CHOLINE_Gen1_25to30hpi`,
         `Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_35to40hpi`,
         `Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_35to40hpi`,
         # different timepoints
         `Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi`,
         `Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`,
         `Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi`,
         `Raw counts_iRBCs_NO CHOLINE_Gen1_25to30hpi`,
         `Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_25to30hpi`,
         `Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_25to30hpi`,
         # late stage
         `Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi`,
         `Raw counts_Late stages_CHOLINE_Gen0_35to40hpi`,
         `Raw counts_Late stages_CHOLINE_Gen1_25to30hpi`,
         `Raw counts_Late stages_CHOLINE_Gen1_35to40hpi`,
         # parasitemia
         `Raw counts_RBC_CHOLINE_Gen0_0to5hpi`,
         `Raw counts_iRBCs_CHOLINE_Gen0_0to5hpi`,
         # reticulocytes
         `Raw counts_Retics_Reticulocytes`,
         `Raw counts_Retics_total RBCs`,
         ) %>% 
  mutate(HBB.genotype = case_when(
    HBB.genotype == 0 ~ "HbAA",
    HBB.genotype == 1 ~ "HbAC",
    HBB.genotype == 2 ~ "HbAS",
    HBB.genotype == 3 ~ "HbSS",
    HBB.genotype == 4 ~ "HbSC",
  )) %>%
  filter(!Group == "AA CTRL") %>% 
  filter_all(any_vars(!is.na(.))) %>% # remove empty lines/samples
  mutate(across(c(ID, Group, Sample.type, HBB.genotype, Bloodgroup, Gender, 
                  Symptomatic), as.factor)) %>% 
  mutate(HBB.genotype = fct_relevel(HBB.genotype, "HbAA")) %>%  # set HbAA as baseline reference
  mutate(Group = fct_relevel(Group, "AA")) %>%  # set HbAA as baseline reference
  mutate()
str(df)
## tibble [28 × 28] (S3: tbl_df/tbl/data.frame)
##  $ ID                                                : Factor w/ 28 levels "2","3","4","6",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Group                                             : Factor w/ 5 levels "AA","AC","AS",..: 3 3 2 1 2 2 3 4 4 5 ...
##  $ Sample.type                                       : Factor w/ 1 level "TEST": 1 1 1 1 1 1 1 1 1 1 ...
##  $ HBB.genotype                                      : Factor w/ 5 levels "HbAA","HbAC",..: 3 3 2 1 2 2 3 4 4 5 ...
##  $ Bloodgroup                                        : Factor w/ 5 levels "0","1","2","3",..: 1 3 3 5 5 1 5 5 1 2 ...
##  $ Reticulocytes                                     : num [1:28] 1.78 1.65 1.35 1 2.48 0.62 2.8 5.71 1.84 4.62 ...
##  $ Gender                                            : Factor w/ 2 levels "0","1": 2 2 2 2 2 1 2 1 2 1 ...
##  $ Symptomatic                                       : Factor w/ 2 levels "0","1": 2 1 1 1 2 1 1 1 1 1 ...
##  $ Raw counts_iRBCs_CHOLINE_Gen1_35to40hpi           : num [1:28] 5473 4956 3198 3413 3199 ...
##  $ Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi   : num [1:28] 4028 4047 2831 3031 2854 ...
##  $ Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi    : num [1:28] 1435 904 361 372 332 ...
##  $ Raw counts_Retics_Reticulocytes                   : num [1:28] 19 13 13 11 17 5 21 42 16 43 ...
##  $ Raw counts_Retics_total RBCs                      : num [1:28] 1065 788 965 1098 685 ...
##  $ SCR_CHOLINE_Gen1_35to40hpi                        : num [1:28] 26.3 18.3 11.3 10.9 10.4 ...
##  $ Raw counts_iRBCs_NO CHOLINE_Gen1_25to30hpi        : num [1:28] 3998 3600 2328 1694 1669 ...
##  $ Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_35to40hpi: num [1:28] 2184 2385 1601 2025 1671 ...
##  $ Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_35to40hpi : num [1:28] 1362 1120 558 830 827 ...
##  $ Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi           : num [1:28] 5760 5160 3904 2499 2149 ...
##  $ Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi   : num [1:28] 4308 4266 3568 2263 1945 ...
##  $ Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi    : num [1:28] 1440 886 332 236 202 277 195 51 102 103 ...
##  $ Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_25to30hpi: num [1:28] 2643 2474 1786 1293 1191 ...
##  $ Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_25to30hpi : num [1:28] 1348 1123 533 398 476 ...
##  $ Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi           : num [1:28] 2229 1865 1725 797 649 ...
##  $ Raw counts_Late stages_CHOLINE_Gen0_35to40hpi     : num [1:28] 891 613 1065 425 339 ...
##  $ Raw counts_Late stages_CHOLINE_Gen1_25to30hpi     : num [1:28] 301 302 482 167 90 62 29 29 52 64 ...
##  $ Raw counts_Late stages_CHOLINE_Gen1_35to40hpi     : num [1:28] 795 786 908 1164 1020 ...
##  $ Raw counts_RBC_CHOLINE_Gen0_0to5hpi               : num [1:28] 950 993 888 93494 95898 ...
##  $ Raw counts_iRBCs_CHOLINE_Gen0_0to5hpi             : num [1:28] 8 12 20 144 125 402 140 186 705 232 ...
summary(df)
##        ID     Group   Sample.type HBB.genotype Bloodgroup Reticulocytes  
##  2      : 1   AA: 5   TEST:28     HbAA: 5      0:13       Min.   :0.570  
##  3      : 1   AC: 5               HbAC: 5      1: 4       1st Qu.:1.333  
##  4      : 1   AS: 4               HbAS: 4      2: 3       Median :1.715  
##  6      : 1   SC: 2               HbSC: 2      3: 3       Mean   :2.731  
##  7      : 1   SS:12               HbSS:12      4: 5       3rd Qu.:4.515  
##  8      : 1                                               Max.   :6.720  
##  (Other):22                                                              
##  Gender Symptomatic Raw counts_iRBCs_CHOLINE_Gen1_35to40hpi
##  0:13   0:22        Min.   : 952                           
##  1:15   1: 6        1st Qu.:2283                           
##                     Median :3198                           
##                     Mean   :3081                           
##                     3rd Qu.:3704                           
##                     Max.   :5987                           
##                                                            
##  Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi
##  Min.   : 854                                   
##  1st Qu.:2000                                   
##  Median :2842                                   
##  Mean   :2709                                   
##  3rd Qu.:3208                                   
##  Max.   :5330                                   
##                                                 
##  Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi Raw counts_Retics_Reticulocytes
##  Min.   :  60.0                                 Min.   :  5.00                 
##  1st Qu.: 161.2                                 1st Qu.: 13.00                 
##  Median : 263.0                                 Median : 20.00                 
##  Mean   : 361.9                                 Mean   : 34.43                 
##  3rd Qu.: 505.2                                 3rd Qu.: 50.25                 
##  Max.   :1435.0                                 Max.   :115.00                 
##                                                                                
##  Raw counts_Retics_total RBCs SCR_CHOLINE_Gen1_35to40hpi
##  Min.   : 528.0               Min.   : 4.83             
##  1st Qu.: 881.2               1st Qu.: 7.06             
##  Median :1250.5               Median :10.43             
##  Mean   :1221.3               Mean   :10.92             
##  3rd Qu.:1473.0               3rd Qu.:14.06             
##  Max.   :1893.0               Max.   :26.27             
##                                                         
##  Raw counts_iRBCs_NO CHOLINE_Gen1_25to30hpi
##  Min.   : 121                              
##  1st Qu.:1154                              
##  Median :1466                              
##  Mean   :1751                              
##  3rd Qu.:2291                              
##  Max.   :4081                              
##                                            
##  Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_35to40hpi
##  Min.   : 735                                      
##  1st Qu.:1429                                      
##  Median :1730                                      
##  Mean   :1886                                      
##  3rd Qu.:2228                                      
##  Max.   :4146                                      
##                                                    
##  Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_35to40hpi
##  Min.   :  75.0                                   
##  1st Qu.: 259.0                                   
##  Median : 491.0                                   
##  Mean   : 696.6                                   
##  3rd Qu.: 971.8                                   
##  Max.   :2283.0                                   
##                                                   
##  Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi
##  Min.   : 570                           
##  1st Qu.:1592                           
##  Median :2048                           
##  Mean   :2481                           
##  3rd Qu.:3022                           
##  Max.   :5760                           
##                                         
##  Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi
##  Min.   : 516                                   
##  1st Qu.:1366                                   
##  Median :1882                                   
##  Mean   :2186                                   
##  3rd Qu.:2800                                   
##  Max.   :5184                                   
##                                                 
##  Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi
##  Min.   :  51.0                                
##  1st Qu.: 117.0                                
##  Median : 198.5                                
##  Mean   : 289.2                                
##  3rd Qu.: 324.5                                
##  Max.   :1440.0                                
##                                                
##  Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_25to30hpi
##  Min.   :  90.0                                    
##  1st Qu.: 925.8                                    
##  Median :1194.5                                    
##  Mean   :1349.8                                    
##  3rd Qu.:1731.5                                    
##  Max.   :2851.0                                    
##                                                    
##  Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_25to30hpi
##  Min.   :  17.0                                   
##  1st Qu.: 168.8                                   
##  Median : 240.5                                   
##  Mean   : 396.0                                   
##  3rd Qu.: 544.0                                   
##  Max.   :1348.0                                   
##                                                   
##  Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi
##  Min.   :  521                          
##  1st Qu.:  674                          
##  Median : 1725                          
##  Mean   :31611                          
##  3rd Qu.:90308                          
##  Max.   :97941                          
##  NA's   :7                              
##  Raw counts_Late stages_CHOLINE_Gen0_35to40hpi
##  Min.   :   7.0                               
##  1st Qu.:  44.5                               
##  Median : 126.0                               
##  Mean   : 227.7                               
##  3rd Qu.: 304.2                               
##  Max.   :1065.0                               
##  NA's   :6                                    
##  Raw counts_Late stages_CHOLINE_Gen1_25to30hpi
##  Min.   :  0.00                               
##  1st Qu.:  9.00                               
##  Median : 30.50                               
##  Mean   : 72.39                               
##  3rd Qu.: 67.50                               
##  Max.   :482.00                               
##                                               
##  Raw counts_Late stages_CHOLINE_Gen1_35to40hpi
##  Min.   :   2.0                               
##  1st Qu.:  21.0                               
##  Median : 246.5                               
##  Mean   : 389.4                               
##  3rd Qu.: 547.2                               
##  Max.   :2137.0                               
##                                               
##  Raw counts_RBC_CHOLINE_Gen0_0to5hpi Raw counts_iRBCs_CHOLINE_Gen0_0to5hpi
##  Min.   :  888                       Min.   :   8.0                       
##  1st Qu.:87834                       1st Qu.: 158.0                       
##  Median :91119                       Median : 385.5                       
##  Mean   :80932                       Mean   : 531.6                       
##  3rd Qu.:93972                       3rd Qu.: 718.2                       
##  Max.   :96324                       Max.   :3301.0                       
## 
ggplot(df, aes(x = Group, y = SCR_CHOLINE_Gen1_35to40hpi)) +
  geom_boxplot(aes(fill = Group), outlier.size = 2, outlier.colour = "grey", 
               width = 0.8, position = position_dodge(width = 0.8)) +  # Reduce space between boxplots
  geom_point(aes(fill = Group), alpha = 0.8, shape = 21, colour= "#115e67a6",
             position = position_jitter(width = 0.1, height = 0.1)) +  # Bubble plot
  scale_fill_manual(values = c("AA CTRL" ="paleturquoise","AA" ="paleturquoise", 
                               "AC" = "palegreen", 
                               "AS" = "thistle",
                               "SC" = "lightpink1",
                               "SS"= "peachpuff")) +  # Custom color for bubbles and boxplots
  theme_minimal() +  # Clean theme
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 14),  # Increase font size for x-axis ticks
    axis.text.y = element_text(size = 14),  # Increase font size for y-axis ticks
    axis.title.x = element_text(size = 16),  # Increase font size for x-axis title
    axis.title.y = element_text(size = 16),  # Increase font size for y-axis title
    strip.text = element_text(size = 12),
    panel.grid.major = element_blank(),   # Remove major gridlines
    panel.grid.minor = element_blank(),   # Remove minor gridlines
    axis.line = element_line(color = "black")  # Add black axis lines
  ) +
  coord_cartesian(ylim = c(0, 50))  # Set y-axis limit from 0 to 50

2 Final models

2.1 SCR vs genotype - Choline Gen1 35-40 hpi

The data was modelled using generalized linear model using the beta-binomial family and a single dispersion parameter to account for overdispersion.

\[SCR \sim HBB.genotype\]

This model was chosen to keep things interpretable, and take into account the fact that some variables are not covered by a lot of samples (e.g. sparse/uneven representation across levels of blood group). Additionally, since most estimates remain consistent in directionality and magnitude and LRTs tend to favour the simple model, we opt for this parsimonious model.

Although this approach cannot fully exclude confounding or interaction effects, the direction and magnitude of estimated effects were broadly consistent across alternative model specifications (with only the ivSCA test of SCR (+Choline, Gen1 35-40hpi) exhibiting unstable p-values on the border of our chosen significance level). However, because of this, sample size and power limitations should still be kept in mind when interpreting some of these GL(M)M results, with emphasis on effect sizes rather than exact p-values.

See the model comparison section for more details.

model <- glmmTMB(
  cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
        `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~  
##     HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     333.0     341.0    -160.5     321.0        22 
## 
## 
## Dispersion parameter for betabinomial family ():  126 
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -2.0539     0.1269 -16.179  < 2e-16 ***
## HBB.genotypeHbAC   0.2242     0.1720   1.304  0.19226    
## HBB.genotypeHbAS   0.5495     0.1719   3.196  0.00139 ** 
## HBB.genotypeHbSC  -0.3801     0.2667  -1.425  0.15408    
## HBB.genotypeHbSS  -0.4474     0.1600  -2.796  0.00518 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model assumptions are OK:

# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.243, p-value = 0.242
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.251 0.215 Inf     0.814     1.923    1   1.304  0.7690
##  HbAS / HbAA      1.732 0.298 Inf     1.128     2.661    1   3.196  0.0056
##  HbSC / HbAA      0.684 0.182 Inf     0.351     1.331    1  -1.425  0.6163
##  HbSS / HbAA      0.639 0.102 Inf     0.429     0.953    1  -2.796  0.0207
## 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: bonferroni method for 4 tests 
## Tests are performed on the log odds ratio scale

The above table not only shows the estimated odds ratio and its corresponding p-value, but also the confidence interval of this odds ratio (= asymp.LCL and asymp.UCL).

These p-values are adjusted for multiple testing using the Bonferroni method.

The estimates are already transformed from the log-scale, so they can be directly interpreted as odds ratios (OR).

The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response", adjust="bonferroni")

2.1.1 Bayesian estimates

As a more conservative estimate, we can use a Bayesian approach while fitting a beta-binomial generalized linear model. The estimated credible intervals for the SRC and odds ratios between genotypes should be less overly optimistic (more conservative).

df <- df %>% 
  mutate(sexual = `Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`) %>% 
  mutate(asexual = `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) %>% 
  mutate(total = `Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`)

brm_model <- brm(
  bf(
    sexual | trials(total) ~ HBB.genotype
  ),
  family = beta_binomial(link = "logit"),
  data = df,
  iter = 5000,
  chains = 8,
  seed = 42,
  cores = 8
)
## Compiling Stan program...
## Start sampling
summary(brm_model)
##  Family: beta_binomial 
##   Links: mu = logit 
## Formula: sexual | trials(total) ~ HBB.genotype 
##    Data: df (Number of observations: 28) 
##   Draws: 8 chains, each with iter = 5000; warmup = 2500; thin = 1;
##          total post-warmup draws = 20000
## 
## Regression Coefficients:
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           -2.06      0.15    -2.37    -1.76 1.00     8856    10188
## HBB.genotypeHbAC     0.22      0.21    -0.19     0.64 1.00    10664    12753
## HBB.genotypeHbAS     0.55      0.21     0.14     0.96 1.00    10149    11274
## HBB.genotypeHbSC    -0.42      0.34    -1.14     0.20 1.00    12927    10803
## HBB.genotypeHbSS    -0.43      0.19    -0.80    -0.04 1.00     9899    11516
## 
## Further Distributional Parameters:
##     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi    94.96     28.92    47.30   160.14 1.00    14959    13486
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(brm_model)

The Bayesian estimated odds ratios for the different genotype contrasts are as follows:

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(brm_model, ~ HBB.genotype)

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio lower.HPD upper.HPD
##  HbAC / HbAA      1.253     0.804     1.847
##  HbAS / HbAA      1.728     1.086     2.521
##  HbSC / HbAA      0.669     0.279     1.147
##  HbSS / HbAA      0.648     0.430     0.923
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

The Bayesian estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response", adjust="bonferroni")

2.1.2 Non-parametric Kruskal-Wallis and Wilcoxon rank-sum alternative

Overall effect of genotype is significant. However, the pairwise effects appear to be borderline; some p-values become non-significant when correcting for all pairwises comparisons.

kruskal.test(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` / 
               (df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ 
               df$HBB.genotype)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`/(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) by df$HBB.genotype
## Kruskal-Wallis chi-squared = 16.512, df = 4, p-value = 0.002403
pairwise.wilcox.test(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` / 
               (df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`),
               df$HBB.genotype,
               p.adjust.method = "holm")
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`/(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) and df$HBB.genotype 
## 
##      HbAA  HbAC  HbAS  HbSC 
## HbAC 0.762 -     -     -    
## HbAS 0.222 0.762 -     -    
## HbSC 0.762 0.571 0.667 -    
## HbSS 0.215 0.039 0.040 0.791
## 
## P value adjustment method: holm
pairwise.wilcox.test(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` / 
               (df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`),
               df$HBB.genotype,
               p.adjust.method = "bonferroni")
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`/(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) and df$HBB.genotype 
## 
##      HbAA  HbAC  HbAS  HbSC 
## HbAC 1.000 -     -     -    
## HbAS 0.317 1.000 -     -    
## HbSC 1.000 0.952 1.000 -    
## HbSS 0.268 0.039 0.044 1.000
## 
## P value adjustment method: bonferroni

2.2 SCR vs genotype - Choline Gen1 25-30 hpi

model <- glmmTMB(
  cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi`,
        `Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`) ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`) ~  
##     HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     339.7     347.6    -163.8     327.7        22 
## 
## 
## Dispersion parameter for betabinomial family (): 55.3 
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -2.39455    0.21058 -11.371  < 2e-16 ***
## HBB.genotypeHbAC  0.31997    0.27980   1.144  0.25281    
## HBB.genotypeHbAS  0.80608    0.27424   2.939  0.00329 ** 
## HBB.genotypeHbSC -0.09187    0.40511  -0.227  0.82060    
## HBB.genotypeHbSS  0.20006    0.24421   0.819  0.41267    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model assumptions shows slight deviation for residual vs predicted plots. Adding bloodgroup fixes this, but warns about a potential outlier (see below).

# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.4538, p-value = 0.228
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.377 0.385 Inf     0.685      2.77    1   1.144  1.0000
##  HbAS / HbAA      2.239 0.614 Inf     1.129      4.44    1   2.939  0.0132
##  HbSC / HbAA      0.912 0.370 Inf     0.332      2.51    1  -0.227  1.0000
##  HbSS / HbAA      1.221 0.298 Inf     0.664      2.25    1   0.819  1.0000
## 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: bonferroni method for 4 tests 
## Tests are performed on the log odds ratio scale

The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response", adjust="bonferroni")

2.2.1 Adding bloodgroup to see if this fixes model assumptions

model <- glmmTMB(
  cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi`,
        `Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`) ~ HBB.genotype + Bloodgroup,
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`) ~  
##     HBB.genotype + Bloodgroup
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     340.3     353.6    -160.1     320.3        18 
## 
## 
## Dispersion parameter for betabinomial family (): 73.2 
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -2.2077     0.2429  -9.090   <2e-16 ***
## HBB.genotypeHbAC   0.1324     0.2895   0.457   0.6475    
## HBB.genotypeHbAS   0.6401     0.2723   2.350   0.0188 *  
## HBB.genotypeHbSC  -0.2605     0.3805  -0.685   0.4935    
## HBB.genotypeHbSS  -0.1400     0.2701  -0.518   0.6043    
## Bloodgroup1        0.5274     0.2269   2.324   0.0201 *  
## Bloodgroup2        0.0228     0.2542   0.090   0.9285    
## Bloodgroup3       -0.4202     0.3154  -1.332   0.1828    
## Bloodgroup4       -0.1299     0.2264  -0.574   0.5662    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model assumptions shows slight deviation for residual vs predicted plots. Adding bloodgroup fixes this, but warns about a single potential outlier (see below).

# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.4224, p-value = 0.194
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.142 0.330 Inf     0.554      2.35    1   0.457  1.0000
##  HbAS / HbAA      1.897 0.517 Inf     0.961      3.74    1   2.350  0.0750
##  HbSC / HbAA      0.771 0.293 Inf     0.298      1.99    1  -0.685  1.0000
##  HbSS / HbAA      0.869 0.235 Inf     0.443      1.71    1  -0.518  1.0000
## 
## Results are averaged over the levels of: Bloodgroup 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: bonferroni method for 4 tests 
## Tests are performed on the log odds ratio scale

The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response", adjust="bonferroni")

2.2.2 Non-parametric Kruskal-Wallis and Wilcoxon rank-sum alternative

kruskal.test(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` / 
               (df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`) ~ 
               df$HBB.genotype)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi`/(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`) by df$HBB.genotype
## Kruskal-Wallis chi-squared = 7.8392, df = 4, p-value = 0.09765
pairwise.wilcox.test(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` / 
               (df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`),
               df$HBB.genotype,
               p.adjust.method = "holm")
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi`/(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`) and df$HBB.genotype 
## 
##      HbAA HbAC HbAS HbSC
## HbAC 1.00 -    -    -   
## HbAS 0.16 1.00 -    -   
## HbSC 1.00 1.00 1.00 -   
## HbSS 1.00 1.00 0.38 1.00
## 
## P value adjustment method: holm
pairwise.wilcox.test(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` / 
               (df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`),
               df$HBB.genotype,
               p.adjust.method = "bonferroni")
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi`/(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_25to30hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_25to30hpi`) and df$HBB.genotype 
## 
##      HbAA HbAC HbAS HbSC
## HbAC 1.00 -    -    -   
## HbAS 0.16 1.00 -    -   
## HbSC 1.00 1.00 1.00 -   
## HbSS 1.00 1.00 0.42 1.00
## 
## P value adjustment method: bonferroni

2.3 SCR vs genotype - No choline Gen1 35-40 hpi

model <- glmmTMB(
  cbind(`Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_35to40hpi`,
        `Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_35to40hpi`) ~  
##     HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     360.9     368.9    -174.4     348.9        22 
## 
## 
## Dispersion parameter for betabinomial family (): 58.6 
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -0.74999    0.12533  -5.984 2.18e-09 ***
## HBB.genotypeHbAC  0.02208    0.17683   0.125    0.901    
## HBB.genotypeHbAS  0.05586    0.18661   0.299    0.765    
## HBB.genotypeHbSC -1.20940    0.30539  -3.960 7.49e-05 ***
## HBB.genotypeHbSS -0.89456    0.16171  -5.532 3.17e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model assumptions are OK.

# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.1843, p-value = 0.338
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio     SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.022 0.1810 Inf     0.657     1.590    1   0.125  1.0000
##  HbAS / HbAA      1.057 0.1970 Inf     0.663     1.685    1   0.299  1.0000
##  HbSC / HbAA      0.298 0.0911 Inf     0.139     0.640    1  -3.960  0.0003
##  HbSS / HbAA      0.409 0.0661 Inf     0.273     0.612    1  -5.532  <.0001
## 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: bonferroni method for 4 tests 
## Tests are performed on the log odds ratio scale

The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response", adjust="bonferroni")

2.3.1 Bayesian estimates

As a more conservative estimate, we can use a Bayesian approach while fitting a beta-binomial generalized linear model. The estimated credible intervals for the SRC and odds ratios between genotypes should be less overly optimistic (more conservative).

df <- df %>% 
  mutate(sexual = `Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_35to40hpi`) %>% 
  mutate(asexual = `Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_35to40hpi`) %>% 
  mutate(total = `Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_35to40hpi` + `Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_35to40hpi`)

brm_model <- brm(
  bf(
    sexual | trials(total) ~ HBB.genotype
  ),
  family = beta_binomial(link = "logit"),
  data = df,
  iter = 5000,
  chains = 8,
  seed = 42,
  cores = 8
)
## Compiling Stan program...
## Start sampling
summary(brm_model)
##  Family: beta_binomial 
##   Links: mu = logit 
## Formula: sexual | trials(total) ~ HBB.genotype 
##    Data: df (Number of observations: 28) 
##   Draws: 8 chains, each with iter = 5000; warmup = 2500; thin = 1;
##          total post-warmup draws = 20000
## 
## Regression Coefficients:
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           -0.75      0.15    -1.05    -0.47 1.00    12477    12788
## HBB.genotypeHbAC     0.02      0.21    -0.39     0.42 1.00    13809    13609
## HBB.genotypeHbAS     0.06      0.22    -0.38     0.48 1.00    14248    13657
## HBB.genotypeHbSC    -1.27      0.38    -2.09    -0.58 1.00    15828    11027
## HBB.genotypeHbSS    -0.88      0.19    -1.25    -0.51 1.00    13160    13965
## 
## Further Distributional Parameters:
##     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi    46.89     14.10    23.71    78.34 1.00    16739    14422
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(brm_model)

The Bayesian estimated odds ratios for the different genotype contrasts are as follows:

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(brm_model, ~ HBB.genotype)

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio lower.HPD upper.HPD
##  HbAC / HbAA      1.023     0.647     1.478
##  HbAS / HbAA      1.059     0.652     1.564
##  HbSC / HbAA      0.288     0.101     0.522
##  HbSS / HbAA      0.413     0.269     0.576
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

The Bayesian estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response", adjust="bonferroni")

2.4 SCR vs genotype - No choline Gen1 25-30 hpi

model <- glmmTMB(
  cbind(`Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_25to30hpi`,
        `Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_25to30hpi`) ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_25to30hpi`, `Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_25to30hpi`) ~  
##     HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     327.3     335.3    -157.6     315.3        22 
## 
## 
## Dispersion parameter for betabinomial family (): 69.8 
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -1.10038    0.12987  -8.473  < 2e-16 ***
## HBB.genotypeHbAC  0.02967    0.18261   0.162 0.870922    
## HBB.genotypeHbAS  0.24697    0.18488   1.336 0.181596    
## HBB.genotypeHbSC -1.02698    0.30657  -3.350 0.000809 ***
## HBB.genotypeHbSS -0.64371    0.16314  -3.946 7.96e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model assumptions are OK.

# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.1618, p-value = 0.42
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio     SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.030 0.1880 Inf     0.653      1.63    1   0.162  1.0000
##  HbAS / HbAA      1.280 0.2370 Inf     0.807      2.03    1   1.336  0.7264
##  HbSC / HbAA      0.358 0.1100 Inf     0.167      0.77    1  -3.350  0.0032
##  HbSS / HbAA      0.525 0.0857 Inf     0.350      0.79    1  -3.946  0.0003
## 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: bonferroni method for 4 tests 
## Tests are performed on the log odds ratio scale

The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response", adjust="bonferroni")

2.4.1 Bayesian estimates

As a more conservative estimate, we can use a Bayesian approach while fitting a beta-binomial generalized linear model. The estimated credible intervals for the SRC and odds ratios between genotypes should be less overly optimistic (more conservative).

df <- df %>% 
  mutate(sexual = `Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_25to30hpi`) %>% 
  mutate(asexual = `Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_25to30hpi`) %>% 
  mutate(total = `Raw counts_Sexual iRBCs_NO CHOLINE_Gen1_25to30hpi` + `Raw counts_Asexual iRBCs_NO CHOLINE_Gen1_25to30hpi`)

brm_model <- brm(
  bf(
    sexual | trials(total) ~ HBB.genotype
  ),
  family = beta_binomial(link = "logit"),
  data = df,
  iter = 5000,
  chains = 8,
  seed = 42,
  cores = 8
)
## Compiling Stan program...
## Start sampling
summary(brm_model)
##  Family: beta_binomial 
##   Links: mu = logit 
## Formula: sexual | trials(total) ~ HBB.genotype 
##    Data: df (Number of observations: 28) 
##   Draws: 8 chains, each with iter = 5000; warmup = 2500; thin = 1;
##          total post-warmup draws = 20000
## 
## Regression Coefficients:
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           -1.11      0.15    -1.43    -0.82 1.00    11892    12119
## HBB.genotypeHbAC     0.04      0.22    -0.37     0.48 1.00    13051    13455
## HBB.genotypeHbAS     0.26      0.22    -0.18     0.69 1.00    13628    13205
## HBB.genotypeHbSC    -1.08      0.39    -1.90    -0.38 1.00    15222    11857
## HBB.genotypeHbSS    -0.62      0.19    -0.99    -0.24 1.00    12353    13434
## 
## Further Distributional Parameters:
##     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi    54.25     17.02    26.72    92.62 1.00    16337    14642
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(brm_model)

The Bayesian estimated odds ratios for the different genotype contrasts are as follows:

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(brm_model, ~ HBB.genotype)

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio lower.HPD upper.HPD
##  HbAC / HbAA      1.043     0.646     1.547
##  HbAS / HbAA      1.293     0.806     1.930
##  HbSC / HbAA      0.350     0.131     0.646
##  HbSS / HbAA      0.535     0.351     0.758
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

The Bayesian estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response", adjust="bonferroni")

2.5 Late stage proportions at different timepoints

Note: there are a number of missing values for some of the raw iRBC counts at gen 0 and the raw late stages at gen 0.

2.5.1 Gen0_35to40hpi

model <- glmmTMB(
  cbind(`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi`,
        `Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi` - `Raw counts_Late stages_CHOLINE_Gen0_35to40hpi`) ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi`, `Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi` -  
##     `Raw counts_Late stages_CHOLINE_Gen0_35to40hpi`) ~ HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     287.7     294.0    -137.9     275.7        15 
## 
## 
## Dispersion parameter for betabinomial family (): 5.06 
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -0.95428    0.49939  -1.911 0.056016 .  
## HBB.genotypeHbAC  0.41845    0.68680   0.609 0.542336    
## HBB.genotypeHbAS -0.01763    0.65397  -0.027 0.978490    
## HBB.genotypeHbSC -0.45791    1.02936  -0.445 0.656427    
## HBB.genotypeHbSS -2.06863    0.62116  -3.330 0.000868 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model assumptions are not OK…

# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.0033828, p-value = 0.016
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 0.306
## alternative hypothesis: two.sided
# plot residuals per group
# plotResiduals(sim_res, df$HBB.genotype)

The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio     SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.520 1.0400 Inf    0.2734     8.448    1   0.609  1.0000
##  HbAS / HbAA      0.983 0.6430 Inf    0.1918     5.032    1  -0.027  1.0000
##  HbSC / HbAA      0.633 0.6510 Inf    0.0484     8.274    1  -0.445  1.0000
##  HbSS / HbAA      0.126 0.0785 Inf    0.0268     0.596    1  -3.330  0.0035
## 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: bonferroni method for 4 tests 
## Tests are performed on the log odds ratio scale

The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response", adjust="bonferroni")

2.5.1.1 Kruskal-Wallis alternative

kruskal.test(df$`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi` / 
               df$`Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi` ~ 
               df$HBB.genotype)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df$`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi`/df$`Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi` by df$HBB.genotype
## Kruskal-Wallis chi-squared = 9.4844, df = 4, p-value = 0.05007
pairwise.wilcox.test(df$`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi` / 
               df$`Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi`,
               df$HBB.genotype,
               p.adjust.method = "holm")
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi`/df$`Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi` and df$HBB.genotype 
## 
##      HbAA HbAC HbAS HbSC
## HbAC 1.00 -    -    -   
## HbAS 1.00 1.00 -    -   
## HbSC 1.00 1.00 1.00 -   
## HbSS 0.44 0.44 0.24 1.00
## 
## P value adjustment method: holm
pairwise.wilcox.test(df$`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi` / 
               df$`Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi`,
               df$HBB.genotype,
               p.adjust.method = "bonferroni")
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$`Raw counts_Late stages_CHOLINE_Gen0_35to40hpi`/df$`Raw counts_iRBCs_CHOLINE_Gen0_35to40hpi` and df$HBB.genotype 
## 
##      HbAA HbAC HbAS HbSC
## HbAC 1.00 -    -    -   
## HbAS 1.00 1.00 -    -   
## HbSC 1.00 1.00 1.00 -   
## HbSS 0.49 0.49 0.24 1.00
## 
## P value adjustment method: bonferroni

2.5.2 Gen1_25to30hpi

model <- glmmTMB(
  cbind(`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi`,
        `Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi` - `Raw counts_Late stages_CHOLINE_Gen1_25to30hpi`) ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi`, `Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi` -  
##     `Raw counts_Late stages_CHOLINE_Gen1_25to30hpi`) ~ HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     284.7     292.7    -136.4     272.7        22 
## 
## 
## Dispersion parameter for betabinomial family (): 42.7 
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -4.2105     0.4598  -9.158   <2e-16 ***
## HBB.genotypeHbAC   1.0071     0.5414   1.860   0.0629 .  
## HBB.genotypeHbAS   1.0262     0.5613   1.828   0.0675 .  
## HBB.genotypeHbSC   1.2094     0.6411   1.886   0.0592 .  
## HBB.genotypeHbSS   0.1628     0.5086   0.320   0.7489    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model assumptions show slight deviations for quantiles.

# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.5013, p-value = 0.294
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0.84459, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       2.74 1.480 Inf     0.708     10.58    1   1.860  0.2515
##  HbAS / HbAA       2.79 1.570 Inf     0.687     11.34    1   1.828  0.2701
##  HbSC / HbAA       3.35 2.150 Inf     0.676     16.62    1   1.886  0.2369
##  HbSS / HbAA       1.18 0.599 Inf     0.330      4.19    1   0.320  1.0000
## 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: bonferroni method for 4 tests 
## Tests are performed on the log odds ratio scale

The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response", adjust="bonferroni")

2.5.2.1 Kruskal-Wallis alternative

kruskal.test(df$`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi` / 
               df$`Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi` ~ 
               df$HBB.genotype)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df$`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi`/df$`Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi` by df$HBB.genotype
## Kruskal-Wallis chi-squared = 7.117, df = 4, p-value = 0.1298
pairwise.wilcox.test(df$`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi` / 
               df$`Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi`,
               df$HBB.genotype,
               p.adjust.method = "holm")
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi`/df$`Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi` and df$HBB.genotype 
## 
##      HbAA HbAC HbAS HbSC
## HbAC 1.00 -    -    -   
## HbAS 1.00 1.00 -    -   
## HbSC 1.00 1.00 1.00 -   
## HbSS 1.00 0.74 0.42 1.00
## 
## P value adjustment method: holm
pairwise.wilcox.test(df$`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi` / 
               df$`Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi`,
               df$HBB.genotype,
               p.adjust.method = "bonferroni")
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$`Raw counts_Late stages_CHOLINE_Gen1_25to30hpi`/df$`Raw counts_iRBCs_CHOLINE_Gen1_25to30hpi` and df$HBB.genotype 
## 
##      HbAA HbAC HbAS HbSC
## HbAC 1.00 -    -    -   
## HbAS 1.00 1.00 -    -   
## HbSC 1.00 1.00 1.00 -   
## HbSS 1.00 0.82 0.42 1.00
## 
## P value adjustment method: bonferroni

2.5.3 Gen1_35to40hpi

model <- glmmTMB(
  cbind(`Raw counts_Late stages_CHOLINE_Gen1_35to40hpi`,
        `Raw counts_iRBCs_CHOLINE_Gen1_35to40hpi` - `Raw counts_Late stages_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_Late stages_CHOLINE_Gen1_35to40hpi`, `Raw counts_iRBCs_CHOLINE_Gen1_35to40hpi` -  
##     `Raw counts_Late stages_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     367.0     375.0    -177.5     355.0        22 
## 
## 
## Dispersion parameter for betabinomial family ():  8.7 
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -1.4511     0.3466  -4.186 2.83e-05 ***
## HBB.genotypeHbAC  -0.2143     0.4916  -0.436 0.662965    
## HBB.genotypeHbAS  -0.5160     0.5411  -0.954 0.340311    
## HBB.genotypeHbSC  -0.6327     0.6963  -0.909 0.363563    
## HBB.genotypeHbSS  -1.5736     0.4610  -3.413 0.000642 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model assumptions are OK.

# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.2606, p-value = 0.474
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 0.532
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio     SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      0.807 0.3970 Inf    0.2364     2.756    1  -0.436  1.0000
##  HbAS / HbAA      0.597 0.3230 Inf    0.1545     2.306    1  -0.954  1.0000
##  HbSC / HbAA      0.531 0.3700 Inf    0.0933     3.024    1  -0.909  1.0000
##  HbSS / HbAA      0.207 0.0956 Inf    0.0655     0.656    1  -3.413  0.0026
## 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: bonferroni method for 4 tests 
## Tests are performed on the log odds ratio scale

The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response", adjust="bonferroni")

2.5.3.1 Bayesian estimates

As a more conservative estimate, we can use a Bayesian approach while fitting a beta-binomial generalized linear model. The estimated credible intervals for the SRC and odds ratios between genotypes should be less overly optimistic (more conservative).

df <- df %>% 
  mutate(late = `Raw counts_Late stages_CHOLINE_Gen1_35to40hpi`) %>% 
  mutate(total = `Raw counts_iRBCs_CHOLINE_Gen1_35to40hpi`)

brm_model <- brm(
  bf(
    late | trials(total) ~ HBB.genotype
  ),
  family = beta_binomial(link = "logit"),
  data = df,
  iter = 5000,
  chains = 8,
  seed = 42,
  cores = 8
)
## Compiling Stan program...
## Start sampling
summary(brm_model)
##  Family: beta_binomial 
##   Links: mu = logit 
## Formula: late | trials(total) ~ HBB.genotype 
##    Data: df (Number of observations: 28) 
##   Draws: 8 chains, each with iter = 5000; warmup = 2500; thin = 1;
##          total post-warmup draws = 20000
## 
## Regression Coefficients:
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           -1.46      0.39    -2.29    -0.72 1.00    12175    11604
## HBB.genotypeHbAC    -0.21      0.55    -1.28     0.88 1.00    12093    12089
## HBB.genotypeHbAS    -0.53      0.61    -1.77     0.64 1.00    12683    12317
## HBB.genotypeHbSC    -0.77      0.84    -2.62     0.72 1.00    13861    11921
## HBB.genotypeHbSS    -1.48      0.50    -2.42    -0.44 1.00    11510    12232
## 
## Further Distributional Parameters:
##     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi     7.74      2.40     3.76    13.09 1.00    12943    13439
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(brm_model)

The Bayesian estimated odds ratios for the different genotype contrasts are as follows:

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(brm_model, ~ HBB.genotype)

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio lower.HPD upper.HPD
##  HbAC / HbAA      0.812   0.17327     2.029
##  HbAS / HbAA      0.593   0.09346     1.606
##  HbSC / HbAA      0.492   0.00885     1.643
##  HbSS / HbAA      0.224   0.06318     0.546
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

The Bayesian estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response", adjust="bonferroni")

2.6 Parasitemia vs genotype

There does not appear to be a strong relationship between parasitemia and genotypes.

A quick test (not included) showed that a normal binomial model is a poor fit for the data, so we opt for the beta-binomial one immediately.

model <- glmmTMB(
  cbind(`Raw counts_iRBCs_CHOLINE_Gen0_0to5hpi`,
        `Raw counts_RBC_CHOLINE_Gen0_0to5hpi` - `Raw counts_iRBCs_CHOLINE_Gen0_0to5hpi`) ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_iRBCs_CHOLINE_Gen0_0to5hpi`, `Raw counts_RBC_CHOLINE_Gen0_0to5hpi` -  
##     `Raw counts_iRBCs_CHOLINE_Gen0_0to5hpi`) ~ HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     402.3     410.3    -195.2     390.3        22 
## 
## 
## Dispersion parameter for betabinomial family ():  214 
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -4.99277    0.32581 -15.324   <2e-16 ***
## HBB.genotypeHbAC  0.16185    0.43406   0.373    0.709    
## HBB.genotypeHbAS  0.02599    0.47568   0.055    0.956    
## HBB.genotypeHbSC  0.02415    0.58446   0.041    0.967    
## HBB.genotypeHbSS  0.11327    0.36993   0.306    0.759    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model assumptions are OK.

# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.3572, p-value = 0.362
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA       1.18 0.510 Inf     0.398      3.48    1   0.373  1.0000
##  HbAS / HbAA       1.03 0.488 Inf     0.313      3.37    1   0.055  1.0000
##  HbSC / HbAA       1.02 0.599 Inf     0.238      4.41    1   0.041  1.0000
##  HbSS / HbAA       1.12 0.414 Inf     0.445      2.82    1   0.306  1.0000
## 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: bonferroni method for 4 tests 
## Tests are performed on the log odds ratio scale

The estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response", adjust="bonferroni")

2.6.0.1 Bayesian estimates

As a more conservative estimate, we can use a Bayesian approach while fitting a beta-binomial generalized linear model. The estimated credible intervals for the SRC and odds ratios between genotypes should be less overly optimistic (more conservative).

There does not appear to be a strong relationship between parasitemia and genotypes.

df <- df %>% 
  mutate(irbc = `Raw counts_iRBCs_CHOLINE_Gen0_0to5hpi`) %>% 
  mutate(total = `Raw counts_RBC_CHOLINE_Gen0_0to5hpi`)

brm_model <- brm(
  bf(
    irbc | trials(total) ~ HBB.genotype
  ),
  family = beta_binomial(link = "logit"),
  data = df,
  iter = 5000,
  chains = 8,
  seed = 42,
  cores = 8
)
## Compiling Stan program...
## Start sampling
summary(brm_model)
##  Family: beta_binomial 
##   Links: mu = logit 
## Formula: irbc | trials(total) ~ HBB.genotype 
##    Data: df (Number of observations: 28) 
##   Draws: 8 chains, each with iter = 5000; warmup = 2500; thin = 1;
##          total post-warmup draws = 20000
## 
## Regression Coefficients:
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           -4.96      0.38    -5.78    -4.28 1.00     8330     8259
## HBB.genotypeHbAC     0.16      0.50    -0.83     1.17 1.00     9822    10027
## HBB.genotypeHbAS     0.01      0.55    -1.11     1.07 1.00    10337    10672
## HBB.genotypeHbSC    -0.13      0.74    -1.76     1.15 1.00    10511    10074
## HBB.genotypeHbSS     0.16      0.42    -0.61     1.06 1.00     9190     9144
## 
## Further Distributional Parameters:
##     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi   161.89     49.45    79.95   271.70 1.00    13135    12156
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(brm_model)

The Bayesian estimated odds ratios for the different genotype contrasts are as follows:

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(brm_model, ~ HBB.genotype)

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio lower.HPD upper.HPD
##  HbAC / HbAA      1.169    0.2936      2.72
##  HbAS / HbAA      1.013    0.1924      2.49
##  HbSC / HbAA      0.928    0.0422      2.60
##  HbSS / HbAA      1.148    0.4176      2.49
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

These odds ratios have a large overlap with 1 and do not indicate a strong association between parasitemia and genotype.

The Bayesian estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response", adjust="bonferroni")

2.7 Reticulocyte proportion

There does not appear to be a strong relationship between reticulocyte proportion and Hbb genotype, except for HbSS (significant wald z, although the Bayesian credible interval is close to 1 [1.113; 4.87]), and HbSC (not significant) do show higher proportions. More (balanced) data might be able to make stronger conclusions.

A quick test (not included) showed that a normal binomial model is a poor fit for the data, so we opt for the beta-binomial one immediately.

model <- glmmTMB(
  cbind(`Raw counts_Retics_Reticulocytes`,
        `Raw counts_Retics_total RBCs` - `Raw counts_Retics_Reticulocytes`) ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_Retics_Reticulocytes`, `Raw counts_Retics_total RBCs` -  
##     `Raw counts_Retics_Reticulocytes`) ~ HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     237.2     245.2    -112.6     225.2        22 
## 
## 
## Dispersion parameter for betabinomial family ():  164 
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -4.1328     0.2811 -14.704   <2e-16 ***
## HBB.genotypeHbAC  -0.1458     0.4013  -0.363   0.7163    
## HBB.genotypeHbAS   0.1174     0.4018   0.292   0.7702    
## HBB.genotypeHbSC   0.8451     0.4195   2.014   0.0440 *  
## HBB.genotypeHbSS   0.9382     0.3022   3.105   0.0019 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Some of these estimates appear significant, but when correcting for multiple testing this is no longer the case (except for the HbSS genotype). The magnitude of the effect size for SS and SC is about double, with fairly high standard errors.

Model assumptions are OK.

# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.1131, p-value = 0.592
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      0.864 0.347 Inf     0.317      2.36    1  -0.363  1.0000
##  HbAS / HbAA      1.125 0.452 Inf     0.412      3.07    1   0.292  1.0000
##  HbSC / HbAA      2.328 0.977 Inf     0.816      6.64    1   2.014  0.1759
##  HbSS / HbAA      2.555 0.772 Inf     1.201      5.44    1   3.105  0.0076
## 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: bonferroni method for 4 tests 
## Tests are performed on the log odds ratio scale

The estimated parasitemia per group is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response", adjust="bonferroni")

2.7.1 Bayesian estimates

As a more conservative estimate, we can use a Bayesian approach while fitting a beta-binomial generalized linear model. The estimated credible intervals for the SRC and odds ratios between genotypes should be less overly optimistic (more conservative).

df <- df %>%
  mutate(retics = `Raw counts_Retics_Reticulocytes`) %>%
  mutate(retics_total = `Raw counts_Retics_total RBCs`)

brm_model <- brm(
  bf(
    retics | trials(retics_total) ~ HBB.genotype
  ),
  family = beta_binomial(link = "logit"),
  data = df,
  iter = 5000,
  chains = 8,
  seed = 42,
  cores = 8
)
## Compiling Stan program...
## Start sampling
summary(brm_model)
##  Family: beta_binomial 
##   Links: mu = logit 
## Formula: retics | trials(retics_total) ~ HBB.genotype 
##    Data: df (Number of observations: 28) 
##   Draws: 8 chains, each with iter = 5000; warmup = 2500; thin = 1;
##          total post-warmup draws = 20000
## 
## Regression Coefficients:
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           -4.13      0.33    -4.84    -3.53 1.00     7097     7228
## HBB.genotypeHbAC    -0.14      0.46    -1.06     0.78 1.00     9165     9642
## HBB.genotypeHbAS     0.10      0.47    -0.84     1.03 1.00     9086     9955
## HBB.genotypeHbSC     0.76      0.52    -0.34     1.72 1.00     8993    10679
## HBB.genotypeHbSS     0.95      0.35     0.30     1.70 1.00     7707     8279
## 
## Further Distributional Parameters:
##     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi   124.61     39.84    60.38   215.20 1.00    12685    12196
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(brm_model)

The Bayesian estimated odds ratios for the different genotype contrasts are as follows:

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(brm_model, ~ HBB.genotype)

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio lower.HPD upper.HPD
##  HbAC / HbAA      0.866     0.265      1.94
##  HbAS / HbAA      1.110     0.284      2.44
##  HbSC / HbAA      2.187     0.441      4.92
##  HbSS / HbAA      2.545     1.113      4.87
## 
## Point estimate displayed: median 
## Results are back-transformed from the log odds ratio scale 
## HPD interval probability: 0.95

These odds ratios are very close to 1 (HbSS), or they include it in their credible interval. The association between reticulocyte proportion and genotype cannot reliably be shown based on this data, but it does appear that HbSS (and perhaps HbSC) have higher reticulocyte counts.

The Bayesian estimated SCR per group is, adjusted for multiple testing using the Bonferroni method:

confint(emm, type="response")

3 Model comparison

No random effects are needed since we do not have technical replicates here, unlike the evSCA.

3.1 Binomial generalized linear model

Model assumptions are violated for the regular binomial model: there is clear evidence of overdispersion.

model <- glm(
  cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
        `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype, 
  family = binomial, data = df)

summary(model)
## 
## Call:
## glm(formula = cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, 
##     `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype, 
##     family = binomial, data = df)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -2.05716    0.02455 -83.811  < 2e-16 ***
## HBB.genotypeHbAC  0.22328    0.03366   6.633  3.3e-11 ***
## HBB.genotypeHbAS  0.60589    0.03096  19.571  < 2e-16 ***
## HBB.genotypeHbSC -0.53386    0.06498  -8.216  < 2e-16 ***
## HBB.genotypeHbSS -0.46398    0.03270 -14.187  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2281.83  on 27  degrees of freedom
## Residual deviance:  685.49  on 23  degrees of freedom
## AIC: 900.81
## 
## Number of Fisher Scoring iterations: 4
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.2926, p-value < 2.2e-16
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio      SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.250 0.04210 Inf     1.140     1.370    1   6.633  <.0001
##  HbAS / HbAA      1.833 0.05670 Inf     1.684     1.994    1  19.571  <.0001
##  HbAS / HbAC      1.466 0.04370 Inf     1.352     1.590    1  12.848  <.0001
##  HbSC / HbAA      0.586 0.03810 Inf     0.491     0.700    1  -8.216  <.0001
##  HbSC / HbAC      0.469 0.03020 Inf     0.393     0.559    1 -11.753  <.0001
##  HbSC / HbAS      0.320 0.02020 Inf     0.269     0.380    1 -18.077  <.0001
##  HbSS / HbAA      0.629 0.02060 Inf     0.575     0.687    1 -14.187  <.0001
##  HbSS / HbAC      0.503 0.01590 Inf     0.461     0.548    1 -21.756  <.0001
##  HbSS / HbAS      0.343 0.00984 Inf     0.317     0.371    1 -37.291  <.0001
##  HbSS / HbSC      1.072 0.06860 Inf     0.901     1.277    1   1.093  0.8101
## 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 5 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 5 estimates 
## Tests are performed on the log odds ratio scale
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "sidak",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio     SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.250 0.0421 Inf     1.150     1.360    1   6.633  <.0001
##  HbAS / HbAA      1.833 0.0567 Inf     1.697     1.980    1  19.571  <.0001
##  HbSC / HbAA      0.586 0.0381 Inf     0.499     0.689    1  -8.216  <.0001
##  HbSS / HbAA      0.629 0.0206 Inf     0.580     0.682    1 -14.187  <.0001
## 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: sidak method for 4 tests 
## Tests are performed on the log odds ratio scale

3.2 OLRE GLMM model

We can use a per-observation random effect to account for overdispersion. This seems to alleviate the problems.

df$obs <- seq_len(nrow(df))

model <- glmer(
  cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
        `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype + (1 | obs), 
  family = binomial, data = df)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~  
##     HBB.genotype + (1 | obs)
##    Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     332.2     340.2    -160.1     320.2        22 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -0.51780 -0.14454 -0.01208  0.13350  0.81015 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  obs    (Intercept) 0.0852   0.2919  
## Number of obs: 28, groups:  obs, 28
## 
## Fixed effects:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -2.0860     0.1337 -15.605  < 2e-16 ***
## HBB.genotypeHbAC   0.2315     0.1883   1.229  0.21903    
## HBB.genotypeHbAS   0.5637     0.1989   2.834  0.00460 ** 
## HBB.genotypeHbSC  -0.3972     0.2545  -1.561  0.11861    
## HBB.genotypeHbSS  -0.4620     0.1599  -2.890  0.00385 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) HBB.HAC HBB.HAS HBB.HSC
## HBB.gntyHAC -0.710                        
## HBB.gntyHAS -0.672  0.477                 
## HBB.gntyHSC -0.525  0.373   0.353         
## HBB.gntyHSS -0.836  0.593   0.562   0.439
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.1713, p-value = 0.446
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio     SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.260 0.2370 Inf     0.754     2.107    1   1.229  0.7343
##  HbAS / HbAA      1.757 0.3500 Inf     1.021     3.023    1   2.834  0.0371
##  HbAS / HbAC      1.394 0.2760 Inf     0.812     2.394    1   1.676  0.4489
##  HbSC / HbAA      0.672 0.1710 Inf     0.336     1.346    1  -1.561  0.5228
##  HbSC / HbAC      0.533 0.1350 Inf     0.267     1.066    1  -2.475  0.0962
##  HbSC / HbAS      0.383 0.1000 Inf     0.187     0.782    1  -3.669  0.0023
##  HbSS / HbAA      0.630 0.1010 Inf     0.407     0.974    1  -2.890  0.0315
##  HbSS / HbAC      0.500 0.0795 Inf     0.324     0.771    1  -4.361  0.0001
##  HbSS / HbAS      0.359 0.0615 Inf     0.225     0.572    1  -5.983  <.0001
##  HbSS / HbSC      0.937 0.2190 Inf     0.496     1.773    1  -0.278  0.9987
## 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 5 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 5 estimates 
## Tests are performed on the log odds ratio scale
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "sidak",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.260 0.237 Inf     0.788     2.015    1   1.229  0.6280
##  HbAS / HbAA      1.757 0.350 Inf     1.071     2.884    1   2.834  0.0183
##  HbSC / HbAA      0.672 0.171 Inf     0.357     1.267    1  -1.561  0.3965
##  HbSS / HbAA      0.630 0.101 Inf     0.423     0.938    1  -2.890  0.0153
## 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: sidak method for 4 tests 
## Tests are performed on the log odds ratio scale

3.3 Beta-binomial GLM

Another approach to take the overdispersion into account is to use a beta binomial GLM, this time with a single dispersion parameter (instead of one per group like in the ex vivo assay). For consistency with the ex vivo analysis, we will opt for beta-binomial model. Note that here there is no need for a per-genotype dispersion factor.

model <- glmmTMB(
  cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
        `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~  
##     HBB.genotype
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     333.0     341.0    -160.5     321.0        22 
## 
## 
## Dispersion parameter for betabinomial family ():  126 
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -2.0539     0.1269 -16.179  < 2e-16 ***
## HBB.genotypeHbAC   0.2242     0.1720   1.304  0.19226    
## HBB.genotypeHbAS   0.5495     0.1719   3.196  0.00139 ** 
## HBB.genotypeHbSC  -0.3801     0.2667  -1.425  0.15408    
## HBB.genotypeHbSS  -0.4474     0.1600  -2.796  0.00518 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.243, p-value = 0.242
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio     SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.251 0.2150 Inf     0.783     2.000    1   1.304  0.6888
##  HbAS / HbAA      1.732 0.2980 Inf     1.084     2.769    1   3.196  0.0121
##  HbAS / HbAC      1.384 0.2280 Inf     0.884     2.167    1   1.979  0.2763
##  HbSC / HbAA      0.684 0.1820 Inf     0.330     1.415    1  -1.425  0.6113
##  HbSC / HbAC      0.546 0.1430 Inf     0.267     1.116    1  -2.307  0.1425
##  HbSC / HbAS      0.395 0.1030 Inf     0.193     0.807    1  -3.549  0.0036
##  HbSS / HbAA      0.639 0.1020 Inf     0.413     0.989    1  -2.796  0.0414
##  HbSS / HbAC      0.511 0.0777 Inf     0.337     0.773    1  -4.419  0.0001
##  HbSS / HbAS      0.369 0.0561 Inf     0.244     0.559    1  -6.559  <.0001
##  HbSS / HbSC      0.935 0.2380 Inf     0.467     1.870    1  -0.265  0.9989
## 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 5 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 5 estimates 
## Tests are performed on the log odds ratio scale
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "sidak",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.251 0.215 Inf     0.815     1.921    1   1.304  0.5743
##  HbAS / HbAA      1.732 0.298 Inf     1.129     2.658    1   3.196  0.0056
##  HbSC / HbAA      0.684 0.182 Inf     0.352     1.329    1  -1.425  0.4879
##  HbSS / HbAA      0.639 0.102 Inf     0.429     0.952    1  -2.796  0.0206
## 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: sidak method for 4 tests 
## Tests are performed on the log odds ratio scale

3.4 Accounting for additional factors

The univariate analyses already showed associations between HBB genotype and some of the clinical/demographic factors like blood group, reticulocytes, age and gender. However, accounting for these in the model does not improve overall fit as assessed by a likelihood ratio test of the complex vs simple model.

Moreover, due to the small sample size and the small and sparse groups, we fear we would not have an adequate amount of representative samples for each of the different levels to make any strong conclusions about the association with the sexual conversion rate.

The high collinearity between blood group and genotype might also pose a problem, but this is related to the unbalanced representation of blood groups across HBB genotypes (identifyability issues?).

The direction of estimates remains consistent regardless of which variables are added to the model, but magnitudes and p-value do fluctuate for the HbAS/HbAA and HbSS/HbAA contrasts. This is mainly a concern for the SCR (+Choline, Gen1 35-40hpi) comparison, where the estimated effects shrink for these two contrasts (P grows above 0.05) depending on which additional factors are added.

We therefore focus mainly on a descriptive modelling approach with simple parsimonous models with the single parameter of interest (Hbb genotype). That being said, this approach cannot fully exclude confounding or interaction effects, and sample size and power limitations should still be kept in mind when interpreting some of these GL(M)M results, with emphasis on effect sizes over exact p-values.

model_simple <- glmmTMB(
  cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
        `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype,
  family = betabinomial(link = "logit"),
  data = df
)

3.4.1 Multiple variables

The model assumptions are still OK for the more complex model.

Note that the difference between HbAS and HbAA is less pronounced in the full model compared to the genotype-only one, likely because of the strong correlation between blood group and genotype.

model <- glmmTMB(
  cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
        `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype + 
    Bloodgroup +
    log(Reticulocytes) + Gender + Symptomatic
    # + scale(Age)
    # + Village + Ethnicity # missing values for a few of the samples
  ,
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~  
##     HBB.genotype + Bloodgroup + log(Reticulocytes) + Gender +  
##         Symptomatic
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     342.9     360.2    -158.5     316.9        15 
## 
## 
## Dispersion parameter for betabinomial family ():  148 
## 
## Conditional model:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -1.93846    0.19409  -9.987   <2e-16 ***
## HBB.genotypeHbAC    0.11535    0.20912   0.552   0.5812    
## HBB.genotypeHbAS    0.40620    0.19476   2.086   0.0370 *  
## HBB.genotypeHbSC   -0.53981    0.29569  -1.826   0.0679 .  
## HBB.genotypeHbSS   -0.70566    0.28850  -2.446   0.0144 *  
## Bloodgroup1         0.24984    0.19087   1.309   0.1906    
## Bloodgroup2        -0.03578    0.21671  -0.165   0.8689    
## Bloodgroup3        -0.30168    0.22902  -1.317   0.1878    
## Bloodgroup4        -0.17577    0.20338  -0.864   0.3875    
## log(Reticulocytes)  0.05181    0.13119   0.395   0.6929    
## Gender1             0.06908    0.16017   0.431   0.6663    
## Symptomatic1       -0.01213    0.14242  -0.085   0.9321    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.2419, p-value = 0.232
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
# pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.122 0.235 Inf     0.666      1.89    1   0.552  1.0000
##  HbAS / HbAA      1.501 0.292 Inf     0.923      2.44    1   2.086  0.1480
##  HbSC / HbAA      0.583 0.172 Inf     0.278      1.22    1  -1.826  0.2716
##  HbSS / HbAA      0.494 0.142 Inf     0.240      1.02    1  -2.446  0.0578
## 
## Results are averaged over the levels of: Bloodgroup, Gender, Symptomatic 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: bonferroni method for 4 tests 
## Tests are performed on the log odds ratio scale
model <- glmmTMB(
  cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
        `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype + 
    Bloodgroup +
    log(Reticulocytes) + Gender + Symptomatic
    # + scale(Age)
    # + Village + Ethnicity # missing values for a few of the samples
  ,
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~  
##     HBB.genotype + Bloodgroup + log(Reticulocytes) + Gender +  
##         Symptomatic
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     342.9     360.2    -158.5     316.9        15 
## 
## 
## Dispersion parameter for betabinomial family ():  148 
## 
## Conditional model:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -1.93846    0.19409  -9.987   <2e-16 ***
## HBB.genotypeHbAC    0.11535    0.20912   0.552   0.5812    
## HBB.genotypeHbAS    0.40620    0.19476   2.086   0.0370 *  
## HBB.genotypeHbSC   -0.53981    0.29569  -1.826   0.0679 .  
## HBB.genotypeHbSS   -0.70566    0.28850  -2.446   0.0144 *  
## Bloodgroup1         0.24984    0.19087   1.309   0.1906    
## Bloodgroup2        -0.03578    0.21671  -0.165   0.8689    
## Bloodgroup3        -0.30168    0.22902  -1.317   0.1878    
## Bloodgroup4        -0.17577    0.20338  -0.864   0.3875    
## log(Reticulocytes)  0.05181    0.13119   0.395   0.6929    
## Gender1             0.06908    0.16017   0.431   0.6663    
## Symptomatic1       -0.01213    0.14242  -0.085   0.9321    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.2419, p-value = 0.232
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast    odds.ratio     SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.122 0.2350 Inf     0.634     1.985    1   0.552  0.9818
##  HbAS / HbAA      1.501 0.2920 Inf     0.882     2.553    1   2.086  0.2262
##  HbAS / HbAC      1.338 0.2330 Inf     0.832     2.150    1   1.671  0.4518
##  HbSC / HbAA      0.583 0.1720 Inf     0.260     1.306    1  -1.826  0.3586
##  HbSC / HbAC      0.519 0.1600 Inf     0.224     1.206    1  -2.122  0.2105
##  HbSC / HbAS      0.388 0.1090 Inf     0.181     0.832    1  -3.385  0.0064
##  HbSS / HbAA      0.494 0.1420 Inf     0.225     1.085    1  -2.446  0.1033
##  HbSS / HbAC      0.440 0.1270 Inf     0.200     0.967    1  -2.846  0.0359
##  HbSS / HbAS      0.329 0.0864 Inf     0.161     0.673    1  -4.235  0.0002
##  HbSS / HbSC      0.847 0.2320 Inf     0.401     1.789    1  -0.605  0.9743
## 
## Results are averaged over the levels of: Bloodgroup, Gender, Symptomatic 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 5 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: tukey method for comparing a family of 5 estimates 
## Tests are performed on the log odds ratio scale
# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "sidak",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.122 0.235 Inf     0.667      1.89    1   0.552  0.9692
##  HbAS / HbAA      1.501 0.292 Inf     0.924      2.44    1   2.086  0.1400
##  HbSC / HbAA      0.583 0.172 Inf     0.279      1.22    1  -1.826  0.2452
##  HbSS / HbAA      0.494 0.142 Inf     0.241      1.01    1  -2.446  0.0565
## 
## Results are averaged over the levels of: Bloodgroup, Gender, Symptomatic 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: sidak method for 4 tests 
## Tests are performed on the log odds ratio scale

There are strong correlations between genotype and blood group (already observed in univariate risk analysis). The other factors appear to be less correlated.

performance::check_collinearity(model)
anova(model, model_simple, test = "LRT")

The LRT does not show a significantly better model fit for the more complex model.

3.4.2 Association between genotype and reticulocytes

There is a strong association between Hbb genotype and reticulocytes, but reticulocyte count itself does not seem to be associated with SCR.

ggplot(df, 
       aes(x = HBB.genotype, y = log(Reticulocytes), color = HBB.genotype)) +
  geom_boxplot() +
  geom_jitter(width = 0.1)

ggplot(df, aes(x = log(Reticulocytes),
               y = `Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` / 
                         `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`,
               color = HBB.genotype,
               fill = HBB.genotype
               )) +
  geom_jitter(height = 0.05, width = 0, alpha = 0.3)

Adding it as a variable to control for its potentially confounding effect results in a model that still meets all assumptions. However, sample size is quite small and groups are not balanced, so likely we do not have enough power to resolve both. Moreover, the effect of reticulocytes is not significant, and the direction and magnitude of the Hbb genotype effect stays consistent, although the p-value is no longer significant for HbSS/HbAA (the Hb/AS effect remains the same).

model <- glmmTMB(
  cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
        `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype + 
    log(Reticulocytes), 
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~  
##     HBB.genotype + log(Reticulocytes)
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     334.8     344.1    -160.4     320.8        21 
## 
## 
## Dispersion parameter for betabinomial family ():  127 
## 
## Conditional model:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -2.04178    0.12954 -15.761  < 2e-16 ***
## HBB.genotypeHbAC    0.21718    0.17218   1.261  0.20718    
## HBB.genotypeHbAS    0.55403    0.17167   3.227  0.00125 ** 
## HBB.genotypeHbSC   -0.34388    0.27842  -1.235  0.21679    
## HBB.genotypeHbSS   -0.39856    0.19364  -2.058  0.03957 *  
## log(Reticulocytes) -0.04705    0.10659  -0.441  0.65890    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.2398, p-value = 0.258
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
# pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "sidak",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio    SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.243 0.214 Inf     0.809      1.91    1   1.261  0.6049
##  HbAS / HbAA      1.740 0.299 Inf     1.135      2.67    1   3.227  0.0050
##  HbSC / HbAA      0.709 0.197 Inf     0.354      1.42    1  -1.235  0.6237
##  HbSS / HbAA      0.671 0.130 Inf     0.414      1.09    1  -2.058  0.1491
## 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: sidak method for 4 tests 
## Tests are performed on the log odds ratio scale
performance::check_collinearity(model)
anova(model, model_simple, test = "LRT")

In a model with just reticulocytes, its effect does appear to be significant.

model <- glmmTMB(
  cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
        `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ log(Reticulocytes), 
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~  
##     log(Reticulocytes)
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     345.7     349.7    -169.8     339.7        25 
## 
## 
## Dispersion parameter for betabinomial family (): 61.7 
## 
## Conditional model:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.8735     0.1002 -18.700  < 2e-16 ***
## log(Reticulocytes)  -0.3281     0.1038  -3.161  0.00157 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.6419, p-value = 0.056
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$Reticulocytes)

3.4.3 Association between genotype and blood group

Checking for any confounding effect of blood group on the relation between SCR and genotype is difficult due to the small and unbalanced group sizes; some blood groups only contain a single genotype, and taken together with the small sample size would lead to subgroups with only 1 or 2 samples.

table(df$Bloodgroup, df$HBB.genotype)
##    
##     HbAA HbAC HbAS HbSC HbSS
##   0    1    2    2    1    7
##   1    0    0    0    0    4
##   2    0    2    1    0    0
##   3    2    0    0    0    1
##   4    2    1    1    1    0
ggplot(data = df %>% group_by(Bloodgroup) %>% count(HBB.genotype),
       aes(x = Bloodgroup, y = n, fill = HBB.genotype)) + 
  geom_bar(stat = "identity", position = position_dodge()) + 
  ylab("Frequency") +
  xlab("Blood group") +
  scale_fill_viridis_d() +
  theme_bw()

ggplot(df, aes(x = Group, y = SCR_CHOLINE_Gen1_35to40hpi, fill = Bloodgroup)) +
  geom_boxplot(alpha = 0.8) +
  geom_point(aes(fill = Bloodgroup), size = 4, shape = 21, position = position_jitterdodge()) +
  theme_minimal()

In a model with only blood group and genotype, blood group itself is not significant. The effect of genotype is less pronounced though, with smaller magnitude and larger p-values for HbAS (but not for HbSS), similar to the model with all variables. The direction and magnitude of estimates remains similar.

model <- glmmTMB(
  cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
        `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ HBB.genotype + Bloodgroup,
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~  
##     HBB.genotype + Bloodgroup
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     337.2     350.6    -158.6     317.2        18 
## 
## 
## Dispersion parameter for betabinomial family ():  146 
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -1.90934    0.16409 -11.636  < 2e-16 ***
## HBB.genotypeHbAC  0.09039    0.19217   0.470 0.638093    
## HBB.genotypeHbAS  0.42523    0.18534   2.294 0.021769 *  
## HBB.genotypeHbSC -0.49251    0.26450  -1.862 0.062598 .  
## HBB.genotypeHbSS -0.65825    0.19107  -3.445 0.000571 ***
## Bloodgroup1       0.24787    0.18898   1.312 0.189649    
## Bloodgroup2       0.02735    0.17268   0.158 0.874150    
## Bloodgroup3      -0.26549    0.21600  -1.229 0.219037    
## Bloodgroup4      -0.10952    0.15268  -0.717 0.473157    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.2499, p-value = 0.202
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)

# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
# pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))

# perform only comparisons against AA
AA = c(1,0,0,0,0)
AC = c(0,1,0,0,0)
AS = c(0,0,1,0,0)
SC = c(0,0,0,1,0)
SS = c(0,0,0,0,1)

contrast(emm, method = list("HbAC / HbAA" = AC - AA,
                            "HbAS / HbAA" = AS - AA,
                            "HbSC / HbAA" = SC - AA,
                            "HbSS / HbAA" = SS - AA),
         adjust = "bonferroni",
         type="response",
         infer = c(TRUE, TRUE)
         )
##  contrast    odds.ratio     SE  df asymp.LCL asymp.UCL null z.ratio p.value
##  HbAC / HbAA      1.095 0.2100 Inf     0.677     1.769    1   0.470  1.0000
##  HbAS / HbAA      1.530 0.2840 Inf     0.963     2.431    1   2.294  0.0871
##  HbSC / HbAA      0.611 0.1620 Inf     0.316     1.183    1  -1.862  0.2504
##  HbSS / HbAA      0.518 0.0989 Inf     0.321     0.834    1  -3.445  0.0023
## 
## Results are averaged over the levels of: Bloodgroup 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 4 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: bonferroni method for 4 tests 
## Tests are performed on the log odds ratio scale
performance::check_collinearity(model)

The LRT prefers the simpler model with just genotype. So we are left with the choice of a simpler model (should be preferable due to small sample size), but we need to be transparent that a model that takes blood group into account would reduce the effect size of genotype on the sexual conversion rate.

anova(model, model_simple, test = "LRT")

A model with just blood group also shows some level of association with SCR, but they disappear when adjusting the p-values for multiple comparisons. So, as for genotype itself, the effects are borderline.

model <- glmmTMB(
  cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`,
        `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ Bloodgroup,
  family = betabinomial(link = "logit"),
  data = df
)

summary(model)
##  Family: betabinomial  ( logit )
## Formula:          
## cbind(`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`, `Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~  
##     Bloodgroup
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     354.3     362.3    -171.1     342.3        22 
## 
## 
## Dispersion parameter for betabinomial family (): 56.1 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.18456    0.12120 -18.025   <2e-16 ***
## Bloodgroup1 -0.08303    0.25199  -0.329    0.742    
## Bloodgroup2  0.53438    0.23845   2.241    0.025 *  
## Bloodgroup3 -0.15956    0.28859  -0.553    0.580    
## Bloodgroup4  0.24596    0.21329   1.153    0.249    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)

# Test for overdispersion
testDispersion(sim_res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.7923, p-value = 0.034
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$Bloodgroup)

emm <- emmeans(model, ~ Bloodgroup)
pairs(emm, adjust="bonferroni", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
##  contrast                  odds.ratio    SE  df asymp.LCL asymp.UCL null
##  Bloodgroup1 / Bloodgroup0      0.920 0.232 Inf     0.454      1.87    1
##  Bloodgroup2 / Bloodgroup0      1.706 0.407 Inf     0.874      3.33    1
##  Bloodgroup2 / Bloodgroup1      1.854 0.562 Inf     0.792      4.34    1
##  Bloodgroup3 / Bloodgroup0      0.853 0.246 Inf     0.379      1.92    1
##  Bloodgroup3 / Bloodgroup1      0.926 0.318 Inf     0.353      2.43    1
##  Bloodgroup3 / Bloodgroup2      0.500 0.167 Inf     0.196      1.28    1
##  Bloodgroup4 / Bloodgroup0      1.279 0.273 Inf     0.703      2.33    1
##  Bloodgroup4 / Bloodgroup1      1.390 0.394 Inf     0.627      3.08    1
##  Bloodgroup4 / Bloodgroup2      0.749 0.203 Inf     0.350      1.61    1
##  Bloodgroup4 / Bloodgroup3      1.500 0.475 Inf     0.617      3.65    1
##  z.ratio p.value
##   -0.329  1.0000
##    2.241  0.2502
##    2.038  0.4160
##   -0.553  1.0000
##   -0.223  1.0000
##   -2.077  0.3781
##    1.153  1.0000
##    1.160  1.0000
##   -1.062  1.0000
##    1.281  1.0000
## 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 10 estimates 
## Intervals are back-transformed from the log odds ratio scale 
## P value adjustment: bonferroni method for 10 tests 
## Tests are performed on the log odds ratio scale

Comparing to the Kruskal Wallis and Wilcoxon rank-sum test:

kruskal.test(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` / 
               (df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ df$Bloodgroup)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`/(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) by df$Bloodgroup
## Kruskal-Wallis chi-squared = 6.8139, df = 4, p-value = 0.1461
kruskal.test(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` / 
               (df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) ~ df$HBB.genotype)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi`/(df$`Raw counts_Sexual iRBCs_CHOLINE_Gen1_35to40hpi` + df$`Raw counts_Asexual iRBCs_CHOLINE_Gen1_35to40hpi`) by df$HBB.genotype
## Kruskal-Wallis chi-squared = 16.512, df = 4, p-value = 0.002403

4 Software used

grateful::cite_packages(output = "table", out.dir = here(), cite.tidyverse = TRUE)